library(nycflights13)
library(tidyverse)
## -- Attaching packages ------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.0.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Pick observations by their values - filter(data frame, what to do with the dataframe, using variables) --> results in another data.frame
jan1 <- filter(flights, month == 1, day == 1) #results in a dataframe with Jan 1st as date
(dec25 <- filter(flights, month == 12, day ==25)) #to display results, wrap in paranthesis
#comparison operators are < , <= , > , >= , == , !=
near(sqrt(2)^2, 2)# instead of sqrt(2)^2 == 2
## [1] TRUE
#Boolean operators : & is 'and', | is 'or' and ! is 'not'
nov_dec <- filter(flights, month %in% c(11,12)) #instead of month == 11|12
#nov_dec <- filter(flights, month ==11 | month == 12) #both evaluate to the same
#Finding flights not delayed more than 2 hours in arrival/departure
filter(flights, !(arr_delay >120 | dep_delay >120))
#applying Demorgan's Rule
filter(flights, !arr_delay >120 & !dep_delay >120)
#filter() by default discards NA values.
df <- tibble(x = c(1,NA, 3))
filter(df, x >1)
filter(df, is.na(x)|x>1)
1
#a
filter(flights, arr_delay >=120)
#b
filter(flights, dest %in% c("IAH", "HOU"))
#c
filter(flights, carrier %in% c('UA', 'AA' , 'DL'))